library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(countrycode)
theme_HS<-list(theme(plot.title = element_text(lineheight=1, size=15, face="bold"), #set parameters of title
                     plot.subtitle = element_text(lineheight=1, size=12, face="bold"), # set parameters of subtitle
                     plot.caption = element_text(lineheight=1, size=13, hjust=1), # set parameters of caption
                     legend.title = element_blank (), # legend title (we don't want a title for the legens)
                     legend.text = element_text(colour="black", size = 15), # set parameters of legend text
                     legend.position="bottom", # set position of legend
                     # legend.justification=c(1,0), # set justification of legend
                     legend.background = element_rect(fill=NA, colour = NA), # set legend background
                     legend.key.size = unit(1.5, 'lines'), # set size of simbols of the legend
                     legend.key = element_rect(colour = NA, fill = NA), #set background of simbols in the legend
                     axis.title.x = element_blank (), # set x axis title (we don't want a title for the legend)
                     axis.text.x  = element_text(angle = 0,vjust=0.5, size=15,colour="black"), #set parameters of x axis text
                     axis.title.y = element_text(vjust=2, size=15,colour="black"), # set y axis title 
                     axis.text.y  = element_text(vjust=0.5, size=15,colour="black"), #set parameters of y axis text
                     strip.text = element_text(size=15, face="bold"), #text for facets
                     plot.background =  element_rect(fill = "white"), # set color of the background of the plot
                     panel.grid.major=element_line(colour="#E6E6E6",linewidth=.5), # set color of major grid lines
                     panel.grid.minor=element_line(colour="#E6E6E6",linewidth=.15), # set color of minor grid lines
                     panel.border = element_rect(colour = "#585858", fill=NA, linewidth=.75), #set color of panel border line
                     panel.background =element_rect(fill ="#FFFFFF", colour = "#FFFFFF"))) # set color of the background of the panel of the plot
icpc_data <- read.csv("data/icpc-full.csv")

icpc_data$Year <- as.numeric(icpc_data$Year)

participation_summary <- icpc_data %>%
  group_by(Year) %>%
  summarize(
    num_countries = n_distinct(Country),
    num_teams = n_distinct(Team)
  )

head(participation_summary)
## # A tibble: 6 × 3
##    Year num_countries num_teams
##   <dbl>         <int>     <int>
## 1  1999            21        62
## 2  2000            27        60
## 3  2001            27        64
## 4  2002            27        64
## 5  2003            25        68
## 6  2004            31        73
plot <- ggplot(participation_summary, aes(x = Year, y = num_countries)) +
  geom_line(color = "steelblue", size = 1) +
  geom_point(color = "steelblue", size = 2) +
  annotate("rect", xmin = 2020, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.3, fill = "red") +
  annotate("text", x = 2020, y = 55, 
           label = "COVID-19\nPandemic", vjust = 1, hjust = -0.2, color = "red", size = 5, bold = TRUE) +
scale_x_continuous(breaks = seq(1999, 2023, by = 3), limits = c(1999, 2023)) +
  labs(
    title = "Growth in Number of Participating Countries from 1999 to present",
    x = "Year",
    y = "Number of Countries"
  ) +
  theme_HS
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in annotate("text", x = 2020, y = 55, label = "COVID-19\nPandemic", :
## Ignoring unknown parameters: `bold`
plot

ggsave("plot.png", plot, width = 10, height = 6, units = "in", dpi = 300)
plot <- ggplot(participation_summary, aes(x = Year, y = num_teams)) +
  geom_line(color = "darkorange", size = 1) +
  geom_point(color = "darkorange", size = 2) +
  annotate("rect", xmin = 2020, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.3, fill = "red") +
  annotate("text", x = 2020, y = 150, 
           label = "COVID-19\nPandemic", vjust = 1, hjust = -0.2, color = "red", size = 5, bold = TRUE) +
  scale_x_continuous(breaks = seq(1999, 2023, by = 3), limits = c(1999, 2023)) +
  labs(
    title = "Growth in Number of Participating Teams from 1999 to present",
    x = "Year",
    y = "Number of Teams"
  ) +
  theme_HS
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in annotate("text", x = 2020, y = 150, label = "COVID-19\nPandemic", :
## Ignoring unknown parameters: `bold`
plot

ggsave("plot2.png", plot, width = 10, height = 6, units = "in", dpi = 300)
icpc_data <- icpc_data %>%
  mutate(Gold = as.logical(Gold),
         Silver = as.logical(Silver),
         Bronze = as.logical(Bronze))


icpc_data <- icpc_data %>%
  mutate(Total_Medals = Gold + Silver + Bronze)

print(sum(icpc_data$Total_Medals))
## [1] 317
university_medals <- icpc_data %>%
  group_by(University, Country) %>%
  summarize(Total_Medals = sum(Total_Medals, na.rm = TRUE)) %>%
  arrange(desc(Total_Medals)) %>%
  head(10) %>%
  mutate(University_Country = paste(University, " (", Country, ")", sep = ""))
## `summarise()` has grouped output by 'University'. You can override using the
## `.groups` argument.
country_medals <- icpc_data %>%
  group_by(Country) %>%
  summarize(Total_Medals = sum(Total_Medals, na.rm = TRUE)) %>%
  arrange(desc(Total_Medals)) %>%
  head(10)

print(university_medals)
## # A tibble: 10 × 4
## # Groups:   University [10]
##    University                            Country Total_Medals University_Country
##    <chr>                                 <chr>          <int> <chr>             
##  1 Tsinghua University                   China             16 Tsinghua Universi…
##  2 University of Warsaw                  Poland            16 University of War…
##  3 Moscow State University               Russia            15 Moscow State Univ…
##  4 Massachusetts Institute of Technology United…           14 Massachusetts Ins…
##  5 Shanghai Jiao Tong University         China             14 Shanghai Jiao Ton…
##  6 St. Petersburg State University       Russia            13 St. Petersburg St…
##  7 University of Waterloo                Canada            12 University of Wat…
##  8 The University of Tokyo               Japan             11 The University of…
##  9 National Taiwan University            Taiwan             8 National Taiwan U…
## 10 Peking University                     China              8 Peking University…
print(country_medals)
## # A tibble: 10 × 2
##    Country       Total_Medals
##    <chr>                <int>
##  1 Russia                  87
##  2 China                   53
##  3 United States           41
##  4 Poland                  23
##  5 Canada                  17
##  6 Japan                   14
##  7 South Korea             11
##  8 Taiwan                   8
##  9 Ukraine                  8
## 10 Belarus                  6
icpc_data <- icpc_data %>%
  mutate(Region = countrycode(Country, "country.name", "region"),
         Continent = countrycode(Country, "country.name", "continent"))

south_america_regions <- c("Latin America & Caribbean")
north_america_regions <- c("North America")

head(icpc_data)
##   Year       Date        Host      City                              Venue Rank
## 1 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology    1
## 2 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology    2
## 3 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology    3
## 4 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology    4
## 5 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology    5
## 6 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology    6
##                                            University       Country
## 1                              University of Waterloo        Canada
## 2                 Albert-Ludwigs-Universität Freiburg       Germany
## 3 St. Petersburg Institute of Fine Mechanics & Optics        Russia
## 4                             University of Bucharest       Romania
## 5                                     Duke University United States
## 6             California Polytechnic State University United States
##                  Team     Contestant.1   Contestant.2     Contestant.3  Gold
## 1          U Waterloo    David Kennedy  Ondrej Lhotak   Viet-Trung Luu  TRUE
## 2     Freiburg B Team                                                  FALSE
## 3 St. Petersburg IFMO Alexander Volkov Matvey Kazakov Vladimir Lyovkin FALSE
## 4    UNIBUC Bucharest                                                  FALSE
## 5    Duke Blue Devils                                                  FALSE
## 6       Cal Poly Gold                                                  FALSE
##   Silver Bronze Honorable Score Total Score.Percentage Penalty          Prize
## 1  FALSE  FALSE     False     6     8             0.75     948 World Champion
## 2   TRUE  FALSE     False     6     8             0.75     992               
## 3  FALSE   TRUE     False     6     8             0.75    1046               
## 4  FALSE   TRUE     False     6     8             0.75    1048               
## 5  FALSE   TRUE     False     6     8             0.75    1337               
## 6  FALSE   TRUE     False     5     8             0.62     724               
##   Total_Medals                Region Continent
## 1            1         North America  Americas
## 2            1 Europe & Central Asia    Europe
## 3            1 Europe & Central Asia    Europe
## 4            1 Europe & Central Asia    Europe
## 5            1         North America  Americas
## 6            1         North America  Americas
print(unique(icpc_data$Region))
## [1] "North America"              "Europe & Central Asia"     
## [3] "East Asia & Pacific"        "Latin America & Caribbean" 
## [5] "South Asia"                 "Middle East & North Africa"
## [7] "Sub-Saharan Africa"
icpc_data <- icpc_data %>%
  mutate(Continent = case_when(
    Region %in% south_america_regions ~ "South America",
    Region %in% north_america_regions ~ "North America",
    TRUE ~ Continent
  ))

continent_medals <- icpc_data %>%
  group_by(Continent) %>%
  summarize(Total_Medals = sum(Total_Medals, na.rm = TRUE)) %>%
  arrange(desc(Total_Medals))
plot <- ggplot(university_medals, aes(x = reorder(University_Country, Total_Medals), y = Total_Medals)) +
  geom_bar(stat = "identity", fill = "purple") +
  coord_flip() +
  scale_y_continuous(breaks = seq(0, 16, by = 2)) +
  labs(
    title = "Top 10 Universities with Most Medals",
    y = "Total Medals",
    x = NULL
  ) +
    theme_HS

plot

ggsave("plot3.png", plot, width = 12, height = 6, units = "in", dpi = 300)
print(max(country_medals$Total_Medals))
## [1] 87
plot <- ggplot(country_medals, aes(x = reorder(Country, Total_Medals), y = Total_Medals)) +
  geom_bar(stat = "identity", fill = "darkorange") +
  scale_y_continuous(breaks = seq(0, 87, by = 5)) +
  coord_flip() +
  labs(
    title = "Top 10 Countries with Most Medals",
    y = "Total Medals",
    x = NULL
  ) +
    theme_HS

plot

ggsave("plot4.png", plot, width = 12, height = 6, units = "in", dpi = 300)
print(max(continent_medals$Total_Medals))
## [1] 156
plot <- ggplot(continent_medals, aes(x = reorder(Continent, Total_Medals), y = Total_Medals)) +
  geom_bar(stat = "identity", fill = "forestgreen") +
  coord_flip() +
  scale_y_continuous(breaks = seq(0, 156, by = 12)) + 
  labs(
    title = "Total Medals by Continent",
    x = NULL,
    y = "Total Medals"
  ) +
  theme_HS

plot

ggsave("plot5.png", plot, width = 12, height = 6, units = "in", dpi = 300)

total_medals <- sum(continent_medals$Total_Medals)

print(total_medals)
## [1] 317
library(broom)
library(tidyr)

icpc_data <- read.csv("data/icpc-full.csv")

icpc_data <- icpc_data %>%
  mutate(Gold = as.logical(Gold),
         Silver = as.logical(Silver),
         Bronze = as.logical(Bronze))

icpc_data <- icpc_data %>%
  mutate(Total_Medals = Gold + Silver + Bronze)

country_year_medals <- icpc_data %>%
  group_by(Country, Year) %>%
  summarize(Total_Medals = sum(Total_Medals, na.rm = TRUE)) %>%
  ungroup()
## `summarise()` has grouped output by 'Country'. You can override using the
## `.groups` argument.
all_years <- seq(min(country_year_medals$Year), max(country_year_medals$Year))
all_countries <- unique(country_year_medals$Country)

all_combinations <- expand.grid(Country = all_countries, Year = all_years)

country_year_medals_complete <- all_combinations %>%
  left_join(country_year_medals, by = c("Country", "Year")) %>%
  mutate(Total_Medals = ifelse(is.na(Total_Medals), 0, Total_Medals))

get_slope <- function(data) {
  model <- lm(Total_Medals ~ Year, data = data)
  slope <- coef(model)["Year"]
  return(slope)
}

country_slopes <- country_year_medals_complete %>%
  group_by(Country) %>%
  summarize(Slope = get_slope(cur_data_all())) %>%
  arrange(desc(Slope))
## Warning: There was 1 warning in `summarize()`.
## ℹ In argument: `Slope = get_slope(cur_data_all())`.
## ℹ In group 1: `Country = "Afghanistan"`.
## Caused by warning:
## ! `cur_data_all()` was deprecated in dplyr 1.1.0.
## ℹ Please use `pick()` instead.
top_4_countries <- country_slopes %>%
  top_n(4, Slope) %>%
  pull(Country)

top_4_countries_data <- country_year_medals_complete %>%
  filter(Country %in% top_4_countries)

top_4_countries_data$Country <- factor(top_4_countries_data$Country, levels = top_4_countries)


facet_plot <- ggplot(top_4_countries_data, aes(x = Year, y = Total_Medals)) +
  geom_line() +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  scale_y_continuous(breaks = seq(0, 2, by = 1), limits = c(0, 2)) +
  scale_x_continuous(breaks = seq(1999, 2023, by = 4), limits = c(1999, 2023)) +
  facet_wrap(~ Country, scales = "free_y") +
  labs(
    title = "Medals Over Years for Top 4 Countries with Highest Slopes",
    x = NULL,
    y = "Total Medals"
  ) + theme_HS

top_10_countries <- country_slopes %>%
  top_n(10, Slope)

bar_plot <- ggplot(top_10_countries, aes(x = reorder(Country, Slope), y = Slope)) +
  geom_bar(stat = "identity", fill = "dodgerblue") +
  coord_flip() +
  scale_y_continuous(breaks = seq(0, 0.4, by = 0.01)) +
  labs(
    title = "Top 10 Countries by Linear Regression Slope",
    subtitle = "Indicating the Performance Trend Over Years",
    x = NULL,
    y = "Slope (Performance Trend)"
  ) + theme_HS

facet_plot
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 20 rows containing missing values (`geom_smooth()`).

bar_plot

ggsave("plot7.png", facet_plot, width = 12, height = 8, units = "in", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 20 rows containing missing values (`geom_smooth()`).
ggsave("plot6.png", bar_plot, width = 12, height = 8, units = "in", dpi = 300)
icpc_data <- read.csv("data/icpc-full.csv")

head(icpc_data)
##   Year       Date        Host      City                              Venue Rank
## 1 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology    1
## 2 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology    2
## 3 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology    3
## 4 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology    4
## 5 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology    5
## 6 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology    6
##                                            University       Country
## 1                              University of Waterloo        Canada
## 2                 Albert-Ludwigs-Universität Freiburg       Germany
## 3 St. Petersburg Institute of Fine Mechanics & Optics        Russia
## 4                             University of Bucharest       Romania
## 5                                     Duke University United States
## 6             California Polytechnic State University United States
##                  Team     Contestant.1   Contestant.2     Contestant.3  Gold
## 1          U Waterloo    David Kennedy  Ondrej Lhotak   Viet-Trung Luu  True
## 2     Freiburg B Team                                                  False
## 3 St. Petersburg IFMO Alexander Volkov Matvey Kazakov Vladimir Lyovkin False
## 4    UNIBUC Bucharest                                                  False
## 5    Duke Blue Devils                                                  False
## 6       Cal Poly Gold                                                  False
##   Silver Bronze Honorable Score Total Score.Percentage Penalty          Prize
## 1  False  False     False     6     8             0.75     948 World Champion
## 2   True  False     False     6     8             0.75     992               
## 3  False   True     False     6     8             0.75    1046               
## 4  False   True     False     6     8             0.75    1048               
## 5  False   True     False     6     8             0.75    1337               
## 6  False   True     False     5     8             0.62     724
total_teams_per_year <- icpc_data %>%
  group_by(Year) %>%
  summarize(Total_Teams_Year = n())

ranking_data <- icpc_data %>%
  group_by(Country, Year) %>%
  summarize(Average_Rank = ifelse(all(is.na(Rank)), NA, mean(Rank, na.rm = TRUE))) %>%
  ungroup()
## `summarise()` has grouped output by 'Country'. You can override using the
## `.groups` argument.
ranking_data <- ranking_data %>%
  left_join(total_teams_per_year, by = "Year")

ranking_data <- ranking_data %>%
  mutate(Average_Rank = ifelse(is.na(Average_Rank), Total_Teams_Year, Average_Rank))

print(ranking_data)
## # A tibble: 884 × 4
##    Country      Year Average_Rank Total_Teams_Year
##    <chr>       <int>        <dbl>            <int>
##  1 Afghanistan  2020          115              117
##  2 Argentina    1999           38               62
##  3 Argentina    2000           27               60
##  4 Argentina    2001           51               64
##  5 Argentina    2002           10               64
##  6 Argentina    2003           12               68
##  7 Argentina    2004           53               73
##  8 Argentina    2005           44               78
##  9 Argentina    2006           56               83
## 10 Argentina    2007           14               87
## # ℹ 874 more rows
ranking_data <- ranking_data %>%
  mutate(Scaled_Rank = (Average_Rank / Total_Teams_Year) * 100)

all_years <- seq(min(ranking_data$Year), max(ranking_data$Year))
all_countries <- unique(ranking_data$Country)

all_combinations <- expand.grid(Country = all_countries, Year = all_years)

ranking_data_complete <- all_combinations %>%
  left_join(ranking_data, by = c("Country", "Year")) %>%
  mutate(Scaled_Rank = ifelse(is.na(Scaled_Rank), 100, Scaled_Rank))

get_slope <- function(data) {
  model <- lm(Scaled_Rank ~ Year, data = data)
  slope <- coef(model)["Year"]
  return(slope)
}

country_slopes <- ranking_data_complete %>%
  group_by(Country) %>%
  summarize(Slope = get_slope(cur_data_all())) %>%
  arrange(Slope)
## Warning: There was 1 warning in `summarize()`.
## ℹ In argument: `Slope = get_slope(cur_data_all())`.
## ℹ In group 1: `Country = "Afghanistan"`.
## Caused by warning:
## ! `cur_data_all()` was deprecated in dplyr 1.1.0.
## ℹ Please use `pick()` instead.
print(country_slopes)
## # A tibble: 78 × 2
##    Country        Slope
##    <chr>          <dbl>
##  1 Switzerland    -4.12
##  2 United Kingdom -3.64
##  3 Serbia         -3.05
##  4 Indonesia      -2.63
##  5 France         -2.54
##  6 Viet Nam       -2.33
##  7 Ukraine        -2.24
##  8 Kazakhstan     -2.19
##  9 Cuba           -2.10
## 10 Latvia         -1.77
## # ℹ 68 more rows
top_10_countries <- country_slopes %>%
  top_n(10, -Slope)

top_4_countries <- country_slopes %>%
  top_n(4, -Slope) %>%
  pull(Country)

top_4_countries_data <- ranking_data_complete %>%
  filter(Country %in% top_4_countries)

top_4_countries_data$Country <- factor(top_4_countries_data$Country, levels = top_4_countries)

print(top_10_countries)
## # A tibble: 10 × 2
##    Country        Slope
##    <chr>          <dbl>
##  1 Switzerland    -4.12
##  2 United Kingdom -3.64
##  3 Serbia         -3.05
##  4 Indonesia      -2.63
##  5 France         -2.54
##  6 Viet Nam       -2.33
##  7 Ukraine        -2.24
##  8 Kazakhstan     -2.19
##  9 Cuba           -2.10
## 10 Latvia         -1.77
print(top_4_countries_data)
##            Country Year Average_Rank Total_Teams_Year Scaled_Rank
## 1        Indonesia 1999           NA               NA  100.000000
## 2           Serbia 1999           NA               NA  100.000000
## 3      Switzerland 1999           NA               NA  100.000000
## 4   United Kingdom 1999           NA               NA  100.000000
## 5        Indonesia 2000           NA               NA  100.000000
## 6           Serbia 2000           NA               NA  100.000000
## 7      Switzerland 2000           NA               NA  100.000000
## 8   United Kingdom 2000           NA               NA  100.000000
## 9        Indonesia 2001           NA               NA  100.000000
## 10          Serbia 2001           NA               NA  100.000000
## 11     Switzerland 2001           NA               NA  100.000000
## 12  United Kingdom 2001           NA               NA  100.000000
## 13       Indonesia 2002           NA               NA  100.000000
## 14          Serbia 2002           NA               NA  100.000000
## 15     Switzerland 2002           NA               NA  100.000000
## 16  United Kingdom 2002           NA               NA  100.000000
## 17       Indonesia 2003           NA               NA  100.000000
## 18          Serbia 2003           NA               NA  100.000000
## 19     Switzerland 2003           NA               NA  100.000000
## 20  United Kingdom 2003           NA               NA  100.000000
## 21       Indonesia 2004           NA               NA  100.000000
## 22          Serbia 2004           NA               NA  100.000000
## 23     Switzerland 2004           NA               NA  100.000000
## 24  United Kingdom 2004           NA               NA  100.000000
## 25       Indonesia 2005           NA               NA  100.000000
## 26          Serbia 2005           NA               NA  100.000000
## 27     Switzerland 2005           NA               NA  100.000000
## 28  United Kingdom 2005           NA               NA  100.000000
## 29       Indonesia 2006           NA               NA  100.000000
## 30          Serbia 2006           NA               NA  100.000000
## 31     Switzerland 2006           NA               NA  100.000000
## 32  United Kingdom 2006           NA               NA  100.000000
## 33       Indonesia 2007         57.0               87   65.517241
## 34          Serbia 2007           NA               NA  100.000000
## 35     Switzerland 2007           NA               NA  100.000000
## 36  United Kingdom 2007           NA               NA  100.000000
## 37       Indonesia 2008         55.0              100   55.000000
## 38          Serbia 2008           NA               NA  100.000000
## 39     Switzerland 2008           NA               NA  100.000000
## 40  United Kingdom 2008         29.0              100   29.000000
## 41       Indonesia 2009        100.0              100  100.000000
## 42          Serbia 2009           NA               NA  100.000000
## 43     Switzerland 2009           NA               NA  100.000000
## 44  United Kingdom 2009         15.0              100   15.000000
## 45       Indonesia 2010           NA               NA  100.000000
## 46          Serbia 2010           NA               NA  100.000000
## 47     Switzerland 2010           NA               NA  100.000000
## 48  United Kingdom 2010           NA               NA  100.000000
## 49       Indonesia 2011           NA               NA  100.000000
## 50          Serbia 2011           NA               NA  100.000000
## 51     Switzerland 2011         53.0              105   50.476190
## 52  United Kingdom 2011           NA               NA  100.000000
## 53       Indonesia 2012         56.0              112   50.000000
## 54          Serbia 2012           NA               NA  100.000000
## 55     Switzerland 2012           NA               NA  100.000000
## 56  United Kingdom 2012           NA               NA  100.000000
## 57       Indonesia 2013         23.0              118   19.491525
## 58          Serbia 2013           NA               NA  100.000000
## 59     Switzerland 2013         31.0              118   26.271186
## 60  United Kingdom 2013           NA               NA  100.000000
## 61       Indonesia 2014         98.5              120   82.083333
## 62          Serbia 2014           NA               NA  100.000000
## 63     Switzerland 2014         65.0              120   54.166667
## 64  United Kingdom 2014         54.0              120   45.000000
## 65       Indonesia 2015         79.0              126   62.698413
## 66          Serbia 2015           NA               NA  100.000000
## 67     Switzerland 2015         66.0              126   52.380952
## 68  United Kingdom 2015           NA               NA  100.000000
## 69       Indonesia 2016           NA               NA  100.000000
## 70          Serbia 2016           NA               NA  100.000000
## 71     Switzerland 2016         71.0              128   55.468750
## 72  United Kingdom 2016         92.0              128   71.875000
## 73       Indonesia 2017         67.5              133   50.751880
## 74          Serbia 2017           NA               NA  100.000000
## 75     Switzerland 2017         34.0              133   25.563910
## 76  United Kingdom 2017        106.0              133   79.699248
## 77       Indonesia 2018         41.0              140   29.285714
## 78          Serbia 2018           NA               NA  100.000000
## 79     Switzerland 2018         29.0              140   20.714286
## 80  United Kingdom 2018         43.5              140   31.071429
## 81       Indonesia 2019         65.0              135   48.148148
## 82          Serbia 2019         42.0              135   31.111111
## 83     Switzerland 2019         43.0              135   31.851852
## 84  United Kingdom 2019         17.0              135   12.592593
## 85       Indonesia 2020         32.0              117   27.350427
## 86          Serbia 2020         24.0              117   20.512821
## 87     Switzerland 2020         40.0              117   34.188034
## 88  United Kingdom 2020          6.0              117    5.128205
## 89       Indonesia 2021         63.5              132   48.106061
## 90          Serbia 2021         36.0              132   27.272727
## 91     Switzerland 2021          5.0              132    3.787879
## 92  United Kingdom 2021         14.5              132   10.984848
## 93       Indonesia 2022         78.0              124   62.903226
## 94          Serbia 2022         15.0              124   12.096774
## 95     Switzerland 2022         44.0              124   35.483871
## 96  United Kingdom 2022         32.0              124   25.806452
## 97       Indonesia 2023         77.0              130   59.230769
## 98          Serbia 2023         21.0              130   16.153846
## 99     Switzerland 2023         38.0              130   29.230769
## 100 United Kingdom 2023         28.0              130   21.538462
facet_plot <- ggplot(top_4_countries_data, aes(x = Year, y = Scaled_Rank)) +
  geom_line() +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  scale_y_continuous(breaks = seq(0, 100, by = 25), limits = c(0, 100)) +
  scale_x_continuous(breaks = seq(1999, 2023, by = 4), limits = c(1999, 2023)) +
  facet_wrap(~ Country, scales = "free_y") +
  labs(
    title = "Ranking Over Years for Top 4 Countries with Lowest Slopes",
    x = NULL,
    y = "Scaled Rank"
  ) + theme_HS

bar_plot <- ggplot(top_10_countries, aes(x = reorder(Country, -Slope), y = Slope)) +
  geom_bar(stat = "identity", fill = "brown") +
  coord_flip() +
  labs(
    title = "Top 10 Countries by Linear Regression Slope",
    subtitle = "Indicating the Performance Trend Over Years",
    x = NULL,
    y = "Slope (Performance Trend)"
  ) + theme_HS

facet_plot
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 59 rows containing missing values (`geom_smooth()`).

bar_plot

ggsave("plot9.png", facet_plot, width = 12, height = 8, units = "in", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 59 rows containing missing values (`geom_smooth()`).
ggsave("plot8.png", bar_plot, width = 12, height = 8, units = "in", dpi = 300)

cuba_data <- ranking_data_complete %>%
  filter(Country == "Cuba")

cuba_plot <- ggplot(cuba_data, aes(x = Year, y = Scaled_Rank)) +
  geom_line() +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  scale_y_continuous(breaks = seq(0, 100, by = 25), limits = c(0, 100)) +
  scale_x_continuous(breaks = seq(1999, 2023, by = 4), limits = c(1999, 2023)) +
  labs(
    title = "Ranking Over Years for Cuba",
    x = NULL,
    y = "Scaled Rank"
  ) + theme_HS

cuba_plot
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 17 rows containing missing values (`geom_smooth()`).

ggsave("plot10.png", cuba_plot, width = 12, height = 6, units = "in", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 17 rows containing missing values (`geom_smooth()`).
library(leaflet)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(htmltools)
library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
world <- st_read("data/world.shp")
## Reading layer `world' from data source 
##   `/home/brayand/Storage/School/from-data-to-knowledge-interpretation-visualization-presentation-course/Final Project/data/world.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 197 features and 63 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -55.8917 xmax: 180 ymax: 83.59961
## Geodetic CRS:  GCS_unknown
data <- read_csv("data/icpc-full.csv")
## Rows: 2562 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (10): Host, City, Venue, University, Country, Team, Contestant 1, Conte...
## dbl   (6): Year, Rank, Score, Total, Score Percentage, Penalty
## lgl   (4): Gold, Silver, Bronze, Honorable
## date  (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data$Host[data$Host == "Czechia"] <- "Czech Rep."
data$Country[data$Country == "Czechia"] <- "Czech Rep."

host_counts <- data %>%
  select(Year, Host) %>%
  distinct() %>%
  group_by(Host) %>%
  summarise(Count = n()) %>%
  ungroup()

total_teams_per_year <- icpc_data %>%
  group_by(Year) %>%
  summarize(Total_Teams_Year = n())

ranking_data <- icpc_data %>%
  group_by(Country, Year) %>%
  summarize(Average_Rank = ifelse(all(is.na(Rank)), NA, mean(Rank, na.rm = TRUE))) %>%
  ungroup()
## `summarise()` has grouped output by 'Country'. You can override using the
## `.groups` argument.
ranking_data <- ranking_data %>%
  left_join(total_teams_per_year, by = "Year")

ranking_data <- ranking_data %>%
  mutate(Average_Rank = ifelse(is.na(Average_Rank), Total_Teams_Year, Average_Rank))

print(ranking_data)
## # A tibble: 884 × 4
##    Country      Year Average_Rank Total_Teams_Year
##    <chr>       <int>        <dbl>            <int>
##  1 Afghanistan  2020          115              117
##  2 Argentina    1999           38               62
##  3 Argentina    2000           27               60
##  4 Argentina    2001           51               64
##  5 Argentina    2002           10               64
##  6 Argentina    2003           12               68
##  7 Argentina    2004           53               73
##  8 Argentina    2005           44               78
##  9 Argentina    2006           56               83
## 10 Argentina    2007           14               87
## # ℹ 874 more rows
ranking_data <- ranking_data %>%
  mutate(Scaled_Rank = (Average_Rank / Total_Teams_Year) * 100)

all_years <- seq(min(ranking_data$Year), max(ranking_data$Year))
all_countries <- unique(ranking_data$Country)

all_combinations <- expand.grid(Country = all_countries, Year = all_years)

ranking_data_complete <- all_combinations %>%
  left_join(ranking_data, by = c("Country", "Year")) %>%
  mutate(Scaled_Rank = ifelse(is.na(Scaled_Rank), 100, Scaled_Rank))

average_ranking <- ranking_data_complete %>%
  group_by(Country) %>%
  summarize(Average_Rank = mean(Scaled_Rank, na.rm = TRUE)) %>%
  arrange(Average_Rank)

world <- world %>%
  left_join(host_counts, by = c("name" = "Host")) %>%
  left_join(average_ranking, by = c("name" = "Country"))

world$Count[is.na(world$Count)] <- 0

pal_host <- colorNumeric("YlOrRd", domain = world$Count)

pal_rank <- colorNumeric("YlGnBu", domain = world$Average_Rank, reverse = TRUE)

total_hosted <- sum(world$Count)

International Collegiate Programming Contest (ICPC) Heatmap of Host Countries

map_host <- leaflet(data = world) %>%
  addTiles() %>%
  addPolygons(
    fillColor = ~pal_host(Count),
    weight = 1,
    opacity = 1,
    color = "white",
    dashArray = "3",
    fillOpacity = 0.7,
    highlight = highlightOptions(
      weight = 3,
      color = "#666",
      dashArray = "",
      fillOpacity = 0.7,
      bringToFront = TRUE
    ),
    label = ~paste(name, ": ", Count, " times hosted"),
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto"
    )
  ) %>%
  addLegend(
    pal = pal_host,
    values = ~Count,
    opacity = 0.7,
    title = "Times Hosted",
    position = "bottomright"
  )

map_host

International Collegiate Programming Contest (ICPC) Heatmap of Ranking Average

world <- world %>%
  filter(!is.na(Average_Rank))

map_rank <- leaflet(data = world) %>%
  addTiles() %>%
  addPolygons(
    fillColor = ~pal_rank(Average_Rank),
    weight = 1,
    opacity = 1,
    color = "white",
    dashArray = "3",
    fillOpacity = 0.7,
    highlight = highlightOptions(
      weight = 3,
      color = "#666",
      dashArray = "",
      fillOpacity = 0.7,
      bringToFront = TRUE
    ),
    label = ~ifelse(is.na(Average_Rank), paste(name, ": No Participation"), paste(name, ": Average Rank ", round(Average_Rank, 2))),
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto"
    )
  ) %>%
  addLegend(
    pal = pal_rank,
    values = ~Average_Rank,
    opacity = 0.7,
    title = "Average Ranking",
    position = "bottomright"
  )

map_rank